library(MASS)
library(ggplot2)
library(Hmisc)
library(reshape2)
library(plyr)
library(dplyr)
library(scales)
library(AER)
library(extrafont)
loadfonts(device="win")



################Organising the data################
Articles <- read.csv("C:/Users/Matthew/Documents/TRIP/Journal_Article_Database_Censored.csv") 
Articles[,5:51] <- ifelse(Articles[,5:51] =="Yes",1,0)

#Creating variables to measure relevance to benchmark events
Articles$relevantcollapseofUSSR      <- ifelse(( Articles$BalanceofPower== 1|
                                                   Articles$ItnlRegimes ==1|
                                                   Articles$InterstateCrisis==1), 1, 0)
Articles$relevantregionalintegration <- ifelse(( Articles$Alliances ==1|
                                                   Articles$RegionalIntegration==1|
                                                   Articles$IGO==1),1,0)
Articles$relevantSept11              <- ifelse(( Articles$Terrorism ==1), 1, 0)
Articles$relevantfinancialcrisis     <- ifelse(( Articles$EconomicInterdependence ==1|
                                                   Articles$MonetaryPolicy ==1|
                                                   Articles$Trade==1  ), 1, 0)

#Subsetting for Counting Purposes
USSR_Articles                <- subset(Articles, Articles$relevantcollapseofUSSR==1)
regionalintegration_Articles <- subset(Articles, Articles$relevantregionalintegration==1)
Sept11_Articles              <- subset(Articles, Articles$relevantSept11==1)
financialcrisis_Articles     <- subset(Articles, Articles$relevantfinancialcrisis==1)
other_Articles               <- subset(Articles, Articles$relevantcollapseofUSSR==0 &
                                         Articles$relevantregionalintegration==0 &
                                         Articles$relevantSept11==0 &
                                         Articles$relevantfinancialcrisis==0)

#Creating variables to measure geographic relevance
geoArticles<- read.csv("C:/Users/Matthew/Documents/TRIP/Journal_Article_Database_Censored.csv")
geoArticles[,5:51] <- ifelse(geoArticles[,5:51] =="Yes",1,0)

geoArticles$georelevantcollapseofUSSR      <- ifelse(geoArticles$FSUEasternEurope==1, 1, 0)
geoArticles$georelevantregionalintegration <- ifelse( geoArticles$SoutheastAsia==1 |
                                                        geoArticles$CanadaWestEurope==1 |
                                                        geoArticles$LatinAmerica==1, 1, 0)
geoArticles$georelevantSept11              <- ifelse(geoArticles$MiddleEastNorthAfrica==1, 1, 0)
geoArticles$georelevantfinancialcrisis     <- ifelse(geoArticles$Global==1, 1, 0)

#Subsetting for counting purposes
USSR_geo_Articles                <- subset(geoArticles, geoArticles$georelevantcollapseofUSSR==1)
regionalintegration_geo_Articles <- subset(geoArticles, geoArticles$georelevantregionalintegration==1)
Sept11_geo_Articles              <- subset(geoArticles, geoArticles$georelevantSept11==1)
financialcrisis_geo_Articles     <- subset(geoArticles, geoArticles$georelevantfinancialcrisis==1)
Other_geo_Articles               <- subset(geoArticles, geoArticles$georelevantcollapseofUSSR==0&
                                             geoArticles$georelevantregionalintegration==0&
                                             geoArticles$georelevantSept11==0&
                                             geoArticles$georelevantfinancialcrisis==0)

########################Generating Count Data##################
#This is creating a dataframe titled 'Count' of the number of articles in these
#subject/geographic areas per year
Count  <- data.frame(count(USSR_Articles                   , "year"))
names(Count)[2]<-"USSR"

Count1 <- data.frame(count(regionalintegration_Articles    , "year"))
Count$regionalintegration<-Count1$freq

Count1 <- data.frame(count(Sept11_Articles                 , "year"))
Count<-merge(Count, Count1, by="year", all=TRUE)
names(Count)[4]<-"Sept11"

Count1 <- data.frame(count(financialcrisis_Articles        , "year"))
Count$financialcrisis<-Count1$freq

Count1 <- data.frame(count(USSR_geo_Articles               , "year"))
Count$USSR_geo<- Count1$freq

Count1 <- data.frame(count(regionalintegration_geo_Articles, "year"))
Count$regionalintegration_geo<-Count1$freq

Count1 <- data.frame(count(Sept11_geo_Articles             , "year"))
Count$Sept11_geo<- Count1$freq

Count1 <- data.frame(count(financialcrisis_geo_Articles    , "year"))
Count$financialcrisis_geo<-Count1$freq
Count[is.na(Count)] <- 0
rm(Count1)

df<- data.frame(count(Articles, "year"))
Count$total <- df$freq
rm(df)

#Creating the deflator variable
Count$deflator <- 133 / Count$total

Count$USSR.deflated                <- round(Count$deflator*Count$USSR, digits = 0)
Count$regionalintegration.deflated <- round(Count$deflator*Count$regionalintegration, digits = 0)
Count$Sept11.deflated              <- round(Count$deflator*Count$Sept11, digits = 0)
Count$financialcrisis.deflated     <- round(Count$deflator*Count$financialcrisis, digits = 0)

Count$USSR.geo.deflated                <- round(Count$deflator*Count$USSR_geo, digits = 0)
Count$regionalintegration.geo.deflated <- round(Count$deflator*Count$regionalintegration_geo, digits = 0)
Count$Sept11.geo.deflated              <- round(Count$deflator*Count$Sept11_geo, digits = 0)
Count$financialcrisis.geo.deflated     <- round(Count$deflator*Count$financialcrisis_geo, digits = 0)

#Creating the before/after variables
Count$USSR.ba                  <- ifelse(Count$year > 1989, 1, 0)
Count$regionalintegration.ba   <- ifelse(Count$year > 1992, 1, 0)
Count$Sept11.ba                <- ifelse(Count$year > 2001, 1, 0)
Count$financialcrisis.ba       <- ifelse(Count$year > 2008, 1, 0)

#Creating a dataframe named'P' with the same count data as above, but for an article's 'Paradigm advanced.'
df<-subset(Articles, Articles$Paradigm=="Realist")
P<-data.frame(count(df, "year"))
names(P)[2] <- "Realist"

df <- subset(Articles, Articles$Paradigm=="Liberal")
df <- data.frame(count(df, "year"))
P$Liberal <- df$freq

df <- subset(Articles, Articles$Paradigm=="Constructivist")
df <- data.frame(count(df, "year"))
P$Constructivist <- df$freq

df <- subset(Articles, Articles$Paradigm=="Marxist")
df <- data.frame(count(df, "year"))
P <- merge(P, df, by = "year", all = TRUE)
names(P)[5]<-"Marxist"
P$Marxist[is.na(P$Marxist)] <- 0

df <- subset(Articles, Articles$Paradigm=="Non-paradigmatic")
df <- data.frame(count(df, "year"))
P$Nonparadigmatic <- df$freq

df <- subset(Articles, Articles$Paradigm=="Atheoretic/Non")
df <- data.frame(count(df, "year"))
P$AtheoreticNon <- df$freq

df <- data.frame(count(Articles, "year"))
P$total <- df$freq

#Creating the other variables for P
P$deflator <- (133 / P$total)

P$Realist.deflated         <- round(P$Realist*P$deflator, digits = 0)
P$Liberal.deflated         <- round(P$Liberal*P$deflator, digits = 0)
P$Constructivist.deflated  <- round(P$Constructivist*P$deflator, digits = 0)
P$Marxist.deflated         <- round(P$Marxist*P$deflator, digits = 0)
P$Nonparadigmatic.deflated <- round(P$Nonparadigmatic*P$deflator, digits = 0)
P$AtheoreticNon.deflated   <- round(P$AtheoreticNon*P$deflator, digits = 0)

P$USSR.ba                  <- ifelse(P$year > 1989, 1, 0)
P$regionalintegration.ba   <- ifelse(P$year > 1992, 1, 0)
P$Sept11.ba                <- ifelse(P$year > 2001, 1, 0)
P$financialcrisis.ba       <- ifelse(P$year > 2008, 1, 0)

####################Running the regressions##################

#Issue Area Regressions
glm.nb1 <- glm.nb(USSR                ~ USSR.ba,                data = Count)
glm.nb2 <- glm.nb(regionalintegration ~ regionalintegration.ba, data = Count)
glm.nb3 <- glm.nb(Sept11              ~ Sept11.ba,              data = Count)
glm.nb4 <- glm.nb(financialcrisis     ~ financialcrisis.ba,     data = Count)

glm.nb5 <- glm.nb(USSR.deflated                ~ USSR.ba,                data = Count)
glm.nb6 <- glm.nb(regionalintegration.deflated ~ regionalintegration.ba, data = Count)
glm.nb7 <- glm.nb(Sept11.deflated              ~ Sept11.ba,              data = Count)
glm.nb8 <- glm.nb(financialcrisis.deflated     ~ financialcrisis.ba,     data = Count)

#I used the summary function to read these regressions to create the tables in the article

#Geographic Area Regressions
glm.nb9  <- glm.nb(USSR_geo                ~ USSR.ba,                data = Count)
glm.nb10 <- glm.nb(regionalintegration_geo ~ regionalintegration.ba, data = Count)
glm.nb11 <- glm.nb(Sept11_geo              ~ Sept11.ba,              data = Count)
glm.nb12 <- glm.nb(financialcrisis_geo     ~ financialcrisis.ba,     data = Count)

glm.nb13 <- glm.nb(USSR.geo.deflated                ~ USSR.ba,                data = Count)
glm.nb14 <- glm.nb(regionalintegration.geo.deflated ~ regionalintegration.ba, data = Count)
glm.nb15 <- glm.nb(Sept11.geo.deflated              ~ Sept11.ba,              data = Count)
glm.nb16 <- glm.nb(financialcrisis.geo.deflated     ~ financialcrisis.ba,     data = Count)

#Paradigm regressions
glm.nb17 <- glm.nb(Realist         ~ USSR.ba + regionalintegration.ba + Sept11.ba + financialcrisis.ba, data = P)
glm.nb18 <- glm.nb(Liberal         ~ USSR.ba + regionalintegration.ba + Sept11.ba + financialcrisis.ba, data = P)
glm.nb19 <- glm.nb(Constructivist  ~ USSR.ba + regionalintegration.ba + Sept11.ba + financialcrisis.ba, data = P)
glm.nb20 <- glm.nb(Marxist         ~ USSR.ba + regionalintegration.ba + Sept11.ba + financialcrisis.ba, data = P)
glm.nb21 <- glm.nb(Nonparadigmatic ~ USSR.ba + regionalintegration.ba + Sept11.ba + financialcrisis.ba, data = P)
glm.nb22 <- glm.nb(AtheoreticNon   ~ USSR.ba + regionalintegration.ba + Sept11.ba + financialcrisis.ba, data = P)

glm.nb23  <- glm.nb(Realist.deflated         ~ USSR.ba + regionalintegration.ba + Sept11.ba + financialcrisis.ba, data = P)
glm.nb24  <- glm.nb(Liberal.deflated         ~ USSR.ba + regionalintegration.ba + Sept11.ba + financialcrisis.ba, data = P)
glm.nb25  <- glm.nb(Constructivist.deflated  ~ USSR.ba + regionalintegration.ba + Sept11.ba + financialcrisis.ba, data = P)
glm.nb26 <- glm.nb(Marxist.deflated         ~ USSR.ba + regionalintegration.ba + Sept11.ba + financialcrisis.ba, data = P)
glm.nb27 <- glm.nb(Nonparadigmatic.deflated ~ USSR.ba + regionalintegration.ba + Sept11.ba + financialcrisis.ba, data = P)
glm.nb28 <- glm.nb(AtheoreticNon.deflated   ~ USSR.ba + regionalintegration.ba + Sept11.ba + financialcrisis.ba, data = P)

#We exclude Marxism in the final table because of the small n; there are 89 Marxist papers in
#the entire TRIP database

##############Statistics on Academic Rank and Authorship############
#Creating a function to streamline generating these percents
function.rank <- function(df, date, years, rank){
  x <- date
  
  a <- nrow(subset(df, df$year <=  x &
                     df$year > (x - (years) ) &
                     df$A0Rank == rank
  ))#returns n of articles written by rank in the x years before the event
  b <- nrow(subset(df, df$year <= x &
                     df$year > (x - (years) ) 
  ))#returns total n of articles written in the x years before the event
  c <- nrow(subset(df, df$year >  x &
                     df$year <= (x + (years) ) &
                     df$A0Rank == rank
  ))#returns n of articles written by rank in the x years after the ecent
  d <- nrow(subset(df, df$year > x &
                     df$year <= (x + (years) ) 
  ))#returns n of articles written in the x years after the event
  cat("In the", years, "years before the event,", (a/b), "percent of articles were written by a/an", rank, ";\n 
      in the", years, "after the event," , (c/d), "percent of articles were written by a/an", rank, ".")
  
}
#Example of this function in use:
function.rank(df = USSR_geo_Articles, 
              date = 1989,
              years = 10,
              rank = "Assistant Professor")


###############Figure 1########################
df <- Count[,c(1, 12:15)]

df <- rename(df, c("USSR.deflated" = "Fall of the USSR" , 
                   "regionalintegration.deflated" = "Regional Integration" , 
                   "Sept11.deflated" = "September 11" , 
                   "financialcrisis.deflated" = "Financial Crisis" )
)

df <- melt(df, id = "year")

ggplot(data = df, aes (x = year, y = value, group = variable), stat = "identity") +
  geom_line(aes(linetype = variable), 
            size = 1)+
  scale_linetype_manual(values = c(1, 6, 5, 3), name = "Benchmark Event")+
  xlab("Year")+
  ylab("Article Count in Issue Area")+
  ggtitle("Figure 1: Yearly Counts of Articles discussing a Benchmark Event's Issue Area from 1980 to 2012")+ 
  theme(text=element_text(family="Times New Roman"), plot.title=element_text(size=16))


#####################Figure 2################
df <- Count[,c(1, 16:19)]

df <- rename(df, c("USSR.geo.deflated" = "Fall of the USSR", 
                   "regionalintegration.geo.deflated" = " Regional Integration" , 
                   "Sept11.geo.deflated" = "September 11" , 
                   "financialcrisis.geo.deflated" = "Financial Crisis")
)

df <- melt(df, id = "year")

ggplot(data = df, aes (x = year, y = value, group = variable), stat = "identity") +
  geom_line(aes(linetype = variable), 
            size = 1)+
  scale_linetype_manual(values = c(1, 6, 5, 3), name = "Benchmark Event")+
  xlab("Year")+
  ylab("Article Count in Geographic Area")+
  ggtitle("Figure 2: Yearly Counts of Articles discussing a Benchmark Event's Geographic Area from 1980 to 2012")+
  theme(text=element_text(family="Times New Roman"), plot.title=element_text(size=16))


#############Figure 3################
P.def <- P[,c(1,10:15)]

P.def <- rename(P.def, c("Realist.deflated" = "Realist" ,
                         "Liberal.deflated" = "Liberal" , 
                         "Constructivist.deflated" = "Constructivist" , 
                         "Marxist.deflated" = "Marxist" , 
                         "Nonparadigmatic.deflated" = "Nonparadigmatic" , 
                         "AtheoreticNon.deflated" = "AtheoreticNon"
))

P.def <- melt(P.def, id="year")

ggplot(data = P.def, aes(x = year, y = value, group = variable), stat = "identity")+
  geom_line(aes(linetype = variable), 
            size = 1)+
  scale_linetype_manual(values = c(1, 4, 3, 2, 6, 5), name = "Paradigm")+
  xlab('year')+
  ylab("Article Count in Paradigm")+
  ggtitle("Figure 3: Yearly Counts of Articles Advancing a Given Paradigm from 1980 to 2012")+  
  theme(text=element_text(family="Times New Roman"), plot.title=element_text(size=16))


